pacman::p_load(purrr,tibble,readr,dplyr,readxl,stringr,tidyr,stringi)
rm(list=ls())
################################################################################

################################################## Argentina

############ Cattle stocks (source: census 2002, 2008, 2018)

province_list <- c('buenosaires', 'catamarca', 'chaco', 'chubut', 'cordoba', 'corrientes', 'entrerios',
                   'formosa', 'jujuy', 'lapampa', 'larioja', 'mendoza', 'misiones', 'neuquen', 'rionegro',
                   'salta', 'sanjuan', 'sanluis', 'santacruz', 'santafe', 'santiagodelestero',
                   'tierradelfuego', 'tucuman')
province_name <- c('Buenos Aires', 'Catamarca', 'Chaco', 'Chubut', 'Cordoba', 'Corrientes', 'Entre Rios',
                   'Formosa', 'Jujuy', 'La Pampa', 'La Rioja', 'Mendoza', 'Misiones', 'Neuquen', 'Rio Negro',
                   'Salta', 'San Juan', 'San Luis', 'Santa Cruz', 'Santa Fe', 'Santiago del Estero',
                   'Tierra del Fuego', 'Tucuman')

#2002
for (n_p in 0:22) {

  df <- read_excel(paste0('../../data/production/raw/cna2002/cabezas/', province_list[n_p + 1], '.xls'), sheet = 1, .name_repair = "unique_quiet")
  df$state <- province_name[n_p + 1]
  df$year <- 2002
  if (ncol(df) == 17) {
    colnames(df) <- c('county', 'space1', 'space2',
                      'total_cattlestock', 'female calves', 'male calves',
                      'young heifers', 'heifers',
                      'cows', 'young steers', 'steers',
                      'young bulls', 'bulls', 'ox', 'unaccounted',
                      'state', 'year')
    df <- df %>% dplyr::select(-space1, -space2)
    df <- df %>% slice(9:n())  
  } else {
    colnames(df) <- c('county',
                      'total_cattlestock', 'female calves', 'male calves',
                      'young heifers', 'heifers',
                      'cows', 'young steers', 'steers',
                      'young bulls', 'bulls', 'ox', 'unaccounted',
                      'state', 'year')
    df <- df %>% slice(9:n())
  }
  df <- df %>% mutate(across(everything(), ~ gsub("-", "0", .)))
  numeric_cols <- c('total_cattlestock', 'female calves', 'male calves',
                    'young heifers', 'heifers', 'cows', 'young steers', 'steers',
                    'young bulls', 'bulls', 'ox', 'unaccounted', 'year')
  df <- df %>% mutate(across(all_of(numeric_cols), as.numeric))
  if (n_p == 0) {  df02 <- df } else { df02 <- bind_rows(df02, df) }
}

#2008
df_list <- list()
for (n_p in 0:22) {
  
  df <- readxl::read_excel(paste0('../../data/production/raw/cna2008/cna08_', province_list[n_p + 1], '.xls'), sheet = 'cuadro 4.2', .name_repair = "unique_quiet")
  df$state <- province_name[n_p + 1]
  df$year <- 2008
  if (ncol(df) == 17) {
    colnames(df) <- c('county', 'space1', 'space2', 'total_cattlestock', 'female calves', 'male calves', 
                      'young heifers', 'heifers', 'cows', 'young steers', 'steers', 'young bulls', 
                      'bulls', 'ox', 'unaccounted', 'state', 'year')
    df <- df |>  dplyr::select(-space1, -space2) |>  dplyr::slice(9:n())
  } else {
    colnames(df) <- c('county', 'total_cattlestock', 'female calves', 'male calves', 
                      'young heifers', 'heifers', 'cows', 'young steers', 'steers', 
                      'young bulls', 'bulls', 'ox', 'unaccounted', 'state', 'year')
    df <- df |>  dplyr::slice(9:n())
  }
  df[df == "-"] <- "0"
  numeric_cols <- c('total_cattlestock', 'female calves', 'male calves', 'young heifers', 
                    'heifers', 'cows', 'young steers', 'steers', 'young bulls', 'bulls', 'ox', 
                    'unaccounted', 'year')
  df[numeric_cols] <- lapply(df[numeric_cols], as.numeric)
  df_list[[n_p + 1]] <- df
}
df08 <- dplyr::bind_rows(df_list)

#2018
for (n_p in 0:22) {
  
  df <- read_excel(paste0('../../data/production/raw/cna2018/CNA18_C_5_2.xlsx'), sheet = paste0('5.2.', n_p + 1), .name_repair = "unique_quiet")
  df$state <- province_name[n_p + 1]
  df$year <- 2018
  colnames(df) <- c('county_code', 'county', 
                    'total_cattlestock', 'female calves', 'male calves',
                    'young heifers', 'heifers', 'cows', 'young steers', 'steers',
                    'young bulls', 'bulls', 'ox', 'unaccounted', 
                    'state', 'year')
  df <- df %>% dplyr::select(-county_code)
  df <- df %>% slice(9:n())
  df <- df %>% slice(1:(n()-1))
  df <- df %>% mutate(across(everything(), ~ gsub("-", "0", .))) 
  numeric_cols <- c('total_cattlestock', 'female calves', 'male calves',
                    'young heifers', 'heifers', 'cows', 'young steers', 'steers',
                    'young bulls', 'bulls', 'ox', 'unaccounted', 'year')
  df <- df %>%  mutate(across(all_of(numeric_cols), as.numeric))
  df <- df %>% slice(1:(n() - 1))
  if (n_p == 0) { df18 <- df } else {df18 <- bind_rows(df18, df) }
}

#Append and correct names
df <- bind_rows(df02, df08, df18)
df$state <- toupper(df$state)
df$county <- toupper(df$county)
df$state <- trimws(df$state)
df$county <- trimws(df$county)

df$state <- df$state %>%
  gsub("Š", "U", .)
df$county <- df$county %>%
  gsub("Š", "U", .)
county_fix <- c(
  "1° DE MAYO" = "PRIMERO DE MAYO",
  "1º DE MAYO" = "PRIMERO DE MAYO",
  "12 DE OCTUBRE" = "DOCE DE OCTUBRE",
  "2 DE ABRIL" = "DOS DE ABRIL",
  "25 DE MAYO" = "VEINTICINCO DE MAYO",
  "25 DE  MAYO" = "VEINTICINCO DE MAYO",
  "9 DE JULIO" = "NUEVE DE JULIO",
  "ADOLFO GONZALES CHAVES" = "GONZALES CHAVES",
  "CORONEL BRANDSEN" = "BRANDSEN",
  "CHOS  MALAL" = "CHOS MALAL",
  "DR MANUEL BELGRANO" = "CAPITAL",
  "FLORENTINO AMEGHINO" = "AMEGHINO",
  "GENERAL ANGEL V PENALOZA" = "GENERAL ANGEL V. PENALOZA",
  "JUAN B ALBERDI" = "JUAN B. ALBERDI",
  "JUAN F IBARRA" = "JUAN F. IBARRA",
  "LEANDRO N ALEM" = "LEANDRO N. ALEM",
  "MAYOR LUIS J FONTANA" = "MAYOR LUIS J. FONTANA",
  "PRESIDENTE DE LA PLAZA" = "PRESIDENCIA DE LA PLAZA",
  "GENERAL JUAN MADARIAGA" = "GENERAL MADARIAGA",
  "GUAYASAN" = "GUASAYAN",
  "LA CANDELARIA" = "CANDELARIA",
  "CORONEL DE MARINA L ROSALES" = "CORONEL ROSALES",
  "GENERAL LA MADRID" = "GENERAL LAMADRID",
  "GENERAL JUAN F QUIROGA" = "GENERAL JUAN F. QUIROGA",
  "SENGUER" = "SENGUER",
  "GENERAL SARMIENTO" = "CAPITAN SARMIENTO",
  "LA CAPITAL" = "CAPITAL",
  "JOSE CLEMENTE PAZ" = "JOSE C. PAZ",
  "SAN MIGUEL DE TUCUMAN" = "CAPITAL",
  "JUAN MARTIN DE PUEYRREDON" = "CAPITAL",
  "SILPICA" = "SILIPICA",
  "LA BANDA" = "BANDA",
  "CHICALCO" = "CHICALCO",
  "O HIGGINS" = "O' HIGGINS",
  "GRAL BELGRANO" = "GENERAL BELGRANO",
  "LDOR GRAL SAN MARTIN" = "LIBERTADOR GENERAL SAN MARTIN",
  "CONCEPCION DE LA SIERRA" = "CONCEPCION",
  "DR. MANUEL BELGRANO" = "CAPITAL",
  "LEANDRO N.  ALEM" = "LEANDRO N. ALEM"
)

df <- df %>% mutate(county = recode(county, !!!county_fix))
df <- df %>% mutate(county = ifelse(state == "MISIONES" & county == "GENERAL BELGRANO", "GENERAL MANUEL BELGRANO", county))
df <- df %>% mutate(across(where(is.numeric), ~ replace_na(., 0)))
df <- df %>% mutate(across(where(is.character), ~ replace_na(., "")))

#Merge with geographic units
df_1 <- df
df_2 <- read_csv('../../data/landuse/clean/geographicunits_argentina.csv.gz',show_col_types = FALSE) %>% dplyr::select(county_id, county, state)
df_cattle <- df_2 %>% inner_join(df_1, by = c("county", "state")) %>% dplyr::select(-county, -state)
write_csv(df_cattle, '../../data/production/clean/cattlestock_argentina_county_census.csv.gz')



############ Cattle establishments (source: census 2002, 2008)

#2002
df02 <- data.frame()
for (n_p in 1:23) {
  df <- read_excel(paste0("../../data/production/raw/cna2002/eap/", province_list[n_p], ".xls"), sheet = 1, col_names = FALSE, skip = 2, .name_repair = "unique_quiet")
  df <- df[7:nrow(df), 1:5]
  df$state <- province_name[n_p]
  df$year <- 2002
  df <- df %>% rename(county = 1, variable = 2, total = 3, `with defined limits` = 4, `without defined limits` = 5) %>%
    dplyr::select(county, variable, total, `with defined limits`, `without defined limits`, state, year)
  df <- df %>% mutate(county = ifelse(is.na(county), lag(county, default = NA), county)) %>%
    drop_na() %>% mutate(across(c(total, `with defined limits`, `without defined limits`), ~ as.numeric(gsub("-", "0", .))))
  if (n_p == 1) {df02 <- df } else {df02 <- bind_rows(df02, df)}
}

#2008
df08 <- data.frame()
for (n_p in 1:23) {
  df <- read_excel(paste0("../../data/production/raw/cna2008/cna08_", province_list[n_p], ".xls"), sheet = 'cuadro 4.1',col_names = FALSE, skip = 2, .name_repair = "unique_quiet")
  df <- df[7:nrow(df), 1:5]
  df$state <- province_name[n_p]
  df$year <- 2008
  df <- df %>% rename(county = 1, variable = 2, total = 3, `with defined limits` = 4, `without defined limits` = 5) %>%
    dplyr::select(county, variable, total, `with defined limits`, `without defined limits`, state, year)
  df <- df %>% mutate(county = ifelse(is.na(county), lag(county, default = NA), county)) %>% drop_na() 
  df[c("total", "with defined limits", "without defined limits")] <- 
    lapply(df[c("total", "with defined limits", "without defined limits")], function(x) {
      x[x == "s"] <- 0
      as.numeric(x)
    })
  if (n_p == 1) {df08 <- df } else {df08 <- bind_rows(df08, df) }
}

#Append and correct names
df <- bind_rows(df02, df08)
df <- df %>%mutate(state = toupper(trimws(state)),county = toupper(trimws(county)))
patterns <- c("ý", "š", "1°")
replacements <- c("Y", "U", "1")

df <- df %>% mutate(state = stri_replace_all_regex(state, patterns, replacements, vectorize_all = FALSE),
                    county = stri_replace_all_regex(county, patterns, replacements, vectorize_all = FALSE))
df$county <- dplyr::recode(df$county,
                           "1 DE MAYO" = "PRIMERO DE MAYO",
                           "12 DE OCTUBRE" = "DOCE DE OCTUBRE",
                           "2 DE ABRIL" = "DOS DE ABRIL",
                           "25 DE MAYO" = "VEINTICINCO DE MAYO",
                           "25 DE  MAYO" = "VEINTICINCO DE MAYO",
                           "9 DE JULIO" = "NUEVE DE JULIO",
                           "ADOLFO GONZALES CHAVES" = "GONZALES CHAVES",
                           "CORONEL BRANDSEN" = "BRANDSEN",
                           "CHOS  MALAL" = "CHOS MALAL",
                           "DR MANUEL BELGRANO" = "CAPITAL",
                           "FLORENTINO AMEGHINO" = "AMEGHINO",
                           "GENERAL ANGEL V PENALOZA" = "GENERAL ANGEL V. PENALOZA",
                           "JUAN B ALBERDI" = "JUAN B. ALBERDI",
                           "JUAN F IBARRA" = "JUAN F. IBARRA",
                           "LEANDRO N ALEM" = "LEANDRO N. ALEM",
                           "MAYOR LUIS J FONTANA" = "MAYOR LUIS J. FONTANA",
                           "PRESIDENTE DE LA PLAZA" = "PRESIDENCIA DE LA PLAZA",
                           "GENERAL JUAN MADARIAGA" = "GENERAL MADARIAGA",
                           "GUAYASAN" = "GUASAYAN",
                           "LA CANDELARIA" = "CANDELARIA",
                           "CORONEL DE MARINA L ROSALES" = "CORONEL ROSALES",
                           "GENERAL LA MADRID" = "GENERAL LAMADRID",
                           "GENERAL JUAN F QUIROGA" = "GENERAL JUAN F. QUIROGA",
                           "SENGUER" = "SENGUER",
                           "GENERAL SARMIENTO" = "CAPITAN SARMIENTO",
                           "LA CAPITAL" = "CAPITAL",
                           "JOSE CLEMENTE PAZ" = "JOSE C. PAZ",
                           "SAN MIGUEL DE TUCUMAN" = "CAPITAL",
                           "JUAN MARTIN DE PUEYRREDON" = "CAPITAL",
                           "SILPICA" = "SILIPICA",
                           "LA BANDA" = "BANDA",
                           "CHICALCO" = "CHICALCO",
                           "O HIGGINS" = "O' HIGGINS",
                           "GRAL BELGRANO" = "GENERAL BELGRANO",
                           "LDOR GRAL SAN MARTIN" = "LIBERTADOR GENERAL SAN MARTIN",
                           "CONCEPCION DE LA SIERRA" = "CONCEPCION",
                           "DR. MANUEL BELGRANO" = "CAPITAL",
                           "LEANDRO N.  ALEM" = "LEANDRO N. ALEM"
)
df <- df %>% mutate(county = ifelse(county == 'GENERAL BELGRANO' & state == 'MISIONES', 'GENERAL MANUEL BELGRANO', county))
df[is.na(df)] <- 0
df$variable <- gsub("cabezas", "Cabezas", df$variable)

#Merge with geographic units
df_e1 <- df %>% filter(variable == 'EAP')
df_e2 <- df %>% filter(variable == 'Cabezas')
df_e <- df_e1 %>%  inner_join(df_e2, by = c("county", "state", "year"), suffix = c("_cattleestab", "_cattlestock_estab")) %>%
  dplyr::select(-variable_cattleestab, -variable_cattlestock_estab)
df_u <- read_csv('../../data/landuse/clean/geographicunits_argentina.csv.gz',show_col_types = FALSE) %>% dplyr::select(county_id, county, state)
df <- df_u %>% inner_join(df_e, by = c("county", "state")) %>% dplyr::select(-county, -state)
write_csv(df, '../../data/production/clean/cattleestab_argentina_county_census.csv.gz')



################################################## Brazil

############ Cattle stocks (source: PAM)

df <- read_excel("../../data/production/raw/tabela3939.xlsx", sheet = 1, .name_repair = "unique_quiet")
colnames(df) <- as.character(df[2, ])
df <- df[-c(1,2,3,4), ]
df <- df[-nrow(df), ]
colnames(df) <- c("county_id", "county",
                  1974:1979,
                  1980:1989,
                  1990:1999,
                  2000:2009,
                  2010:2019)
df <- df %>% mutate(county_id = paste0("BR", as.character(county_id)))
df <- df %>% dplyr::select(-county)
df <- df %>% pivot_longer(cols = -county_id,names_to = "year",values_to = "total_cattlestock")
df <- df %>% mutate(total_cattlestock = ifelse(total_cattlestock %in% c('...', ',', '-', ''), 0, total_cattlestock))
df_u <- read_csv('../../data/landuse/clean/geographicunits_brazil.csv.gz',show_col_types = FALSE) %>% dplyr::select(county_id, county, state)
df <- df %>%inner_join(df_u, by = "county_id") %>% dplyr::select(-county, -state)

df <- df %>% mutate(total_cattlestock = as.integer(total_cattlestock))
write_csv(df, "../../data/production/clean/cattlestock_brazil_county_ppm.csv.gz")


############ Cattle stocks (source: census)

#1995
df <- read_excel('../../data/production/raw/tabela323.xlsx', sheet = 1, .name_repair = "unique_quiet")
df <- df[-c(1,2,3,4), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id','county','species',
                  'total_cattlestock', '0_1ha_cattlestock','1_2ha_cattlestock','2_5ha_cattlestock','5_10ha_cattlestock',
                  '10_20ha_cattlestock','20_50ha_cattlestock','50_100ha_cattlestock',
                  '100_200ha_cattlestock','200_500ha_cattlestock','500_1000ha_cattlestock',
                  '1000_2000ha_cattlestock','2000_5000ha_cattlestock','5000_10000ha_cattlestock',
                  '10000_100000ha_cattlestock','100000ha_cattlestock','undeclared_cattlestock')
df <- df %>% mutate(county_id = paste0('BR', as.character(county_id)))
df <- df %>% dplyr::select(-county, -species)
df <- df %>% mutate(across(c(total_cattlestock, undeclared_cattlestock), 
                ~ifelse(. %in% c('X', '-', '..', '...'), 0, .)))
df <- df %>% mutate(year = 1995)
df1 <- df %>% dplyr::select(county_id, year, total_cattlestock)

#2006
df <- read_excel('../../data/production/raw/tabela922.xlsx', sheet = 2, .name_repair = "unique_quiet")
df <- df[-c(1,2,3,4,5,6), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id','county','pasture group','area group',
                  'total_cattlestock', '1_2_cattlestock','2_4_cattlestock','5_9_cattlestock','10_19_cattlestock',
                  '20_49_cattlestock','50_99_cattlestock','100_199_cattlestock',
                  '200_499_cattlestock','500_cattlestock','undeclared_cattlestock')
df <- df %>% mutate(county_id = paste0('BR', as.character(county_id)))
df <- df %>% dplyr::select(-'county',-'pasture group',-'area group')
df <- df %>% mutate(across(everything(), ~ replace(., . %in% c("X", "-", "..", "..."), 0)))
df <- df %>% mutate(year = 2006)
df2 <- df %>% dplyr::select('county_id','year','total_cattlestock')


#2017
df <- read_excel('../../data/production/raw/tabela6624.xlsx', sheet = 2, .name_repair = "unique_quiet")
df <- df[-c(1,2,3,4), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id','county','total_cattlestock')
df <- df %>% mutate(county_id = paste0('BR', as.character(county_id)))
df <- df %>% mutate(across(everything(), ~ replace(., . %in% c("X", "-", "..", "..."), 0)))
df <- df %>% mutate(year = 2017)
df3 <- df %>% dplyr::select(county_id, year, total_cattlestock)

#Append
df <- bind_rows(df1, df2, df3)
df_u <- read_csv('../../data/landuse/clean/geographicunits_brazil.csv.gz',show_col_types = FALSE) %>% dplyr::select(county_id, county, state)
dfc <- df_u %>% inner_join(df, by = "county_id") %>% dplyr::select(-county, -state)
write_csv(dfc, '../../data/production/clean/cattlestock_brazil_county_census.csv.gz')






############Cattle establishments (source: census)

#1995
df <- read_excel('../../data/production/raw/tabela312.xlsx', sheet = 1, .name_repair = "unique_quiet")
df <- df[-c(1,2,3,4,5), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id', 'county', 'producer group', 'total_agestab', 'total_cattleestab')
df <- df %>%mutate(county_id = paste0('BR', as.character(county_id)))
df <- df %>% dplyr::select(-county, -`producer group`)
df <- df %>%mutate(across(c(total_agestab, total_cattleestab), ~ifelse(. %in% c('X', '-', '..', '...'), 0, .)))
df <- df %>% mutate(year = 1995)
df1 <- df %>% dplyr::select(county_id, year, total_agestab, total_cattleestab)

#2006
df <- read_excel('../../data/production/raw/tabela922.xlsx', sheet = 1, .name_repair = "unique_quiet")
df <- df[-c(1,2,3,4,5,6), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id','county','pasture group','area group',
                  'total_cattleestab', '1_2_cattleestab','2_4_cattleestab','5_9_cattleestab','10_19_cattleestab',
                  '20_49_cattleestab','50_99_cattleestab','100_199_cattleestab',
                  '200_499_cattleestab','500_cattleestab','undeclared_cattleestab')
df <- df %>%mutate(county_id = paste0('BR', as.character(county_id)))
df <- df %>% dplyr::select(-'county',-'pasture group',-'area group')
df <- df %>%mutate(across(everything(), ~ replace(., . %in% c("X", "-", "..", "..."), 0)))
df <- df %>%mutate(year = 2006)
df2 <- df %>% dplyr::select('county_id','year','total_cattleestab')

#2017

df <- read_excel('../../data/production/raw/tabela6624.xlsx', sheet = 1, .name_repair = "unique_quiet")
df <- df[-c(1,2,3,4), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id','county','total_cattleestab')
df <- df %>%mutate(county_id = paste0('BR', as.character(county_id)))
df <- df %>% dplyr::select(-county)
df <- df %>%mutate(across(everything(), ~ replace(., . %in% c("X", "-", "..", "..."), 0)))
df <- df %>%mutate(year = 2017)
df3 <- df %>% dplyr::select(county_id, year, total_cattleestab)

#Append
df <- bind_rows(df1, df2, df3)
df_u <- read_csv('../../data/landuse/clean/geographicunits_brazil.csv.gz',show_col_types = FALSE) %>%dplyr::select(county_id, county, state)
dfe <- df_u %>% inner_join(df, by = "county_id") %>% dplyr::select(-county, -state)
write_csv(dfe, '../../data/production/clean/cattleestab_brazil_county_census.csv.gz')


############ Cattle prices (source: census)

#1995
file_list <- c('tabela327','tabela336','tabela330','tabela339','tabela333','tabela486')
variable_list <- c('purchases (head)', 'purchases (r)',
                   'sales (head)', 'sales (r)',
                   'slaughter (head)', 'slaughter (r)')
dfm <- map2_dfr(file_list, variable_list, function(file_name, var_name) {
  df <- read_excel(paste0('../../data/prices/raw/', file_name, '.xlsx'), sheet = 1, .name_repair = "unique_quiet")
  
  colnames(df) <- as.character(df[3, ])
  df <- df[-c(1,2,3,4), ]
  df <- df[-nrow(df), ]
  
  colnames(df) <- c('county_id', 'county', 'species', 'all_establishments', 'cattle_establishments')

  df <- df %>%
    mutate(
      county_id = paste0("BR", as.character(county_id)),
      variable = var_name
    ) %>%
    dplyr::select(county_id, all_establishments, cattle_establishments, variable)
  
  return(df)
})

df1 <- dfm %>% dplyr::select(county_id, value = all_establishments, variable) %>% mutate(group = 'all establishments')
df2 <- dfm %>% dplyr::select(county_id, value = cattle_establishments, variable) %>% mutate(group = 'cattle establishments')
df95 <- bind_rows(df1, df2) %>% mutate(year = 1995,value = as.numeric(value))

#2006
#Head
df <- read_excel('../../data/prices/raw/tabela928.xlsx', sheet = 1, .name_repair = "unique_quiet")
colnames(df) <- as.character(df[3, ])
df <- df[-c(1,2,3,4,5,6), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id', 'county', 'group', 'xxx', 'slaughter (head)', 'purchases (head)', 
                  'purchases stage 1 (head)', 'purchases stage 2 (head)')
df <- df %>% mutate(county_id = paste0("BR", as.character(county_id)))
df <- df %>% mutate(county_id = ifelse(county_id == "BRNA", lag(county_id), county_id))
df <- df %>% dplyr::select(-county, -xxx)
df <- df %>% mutate(group = str_replace_all(group, c('Total' = 'all establishments','Pecuária e criação de outros animais' = 'cattle establishments')))

df1 <- df %>% pivot_longer(
    cols = c('slaughter (head)', 'purchases (head)', 'purchases stage 1 (head)', 'purchases stage 2 (head)'),
    names_to = "variable",
    values_to = "value" )
df1 <- df1 %>% mutate(value = as.numeric(value))

#Value
df <- read_excel('../../data/prices/raw//tabela928.xlsx', sheet = 2, .name_repair = "unique_quiet")
colnames(df) <- as.character(df[3, ])
df <- df[-c(1,2,3,4,5,6), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id','county','group','xxx','slaughter (r)','purchases (r)', 'purchases stage 1 (r)','purchases stage 2 (r)')
df <- df %>% mutate(county_id = paste0("BR", as.character(county_id)))
df <- df %>% mutate(county_id = ifelse(county_id == "BRNA", lag(county_id), county_id))
df <- df %>% dplyr::select(-county, -xxx)
df <- df %>% mutate(group = str_replace_all(group, c('Total' = 'all establishments',
                                                     'Pecuária e criação de outros animais' = 'cattle establishments')))
df2 <- df %>% pivot_longer(cols = c('slaughter (r)','purchases (r)', 'purchases stage 1 (r)','purchases stage 2 (r)'),
               names_to = "variable", values_to = "value")
df2 <- df2 %>% mutate(value = as.numeric(value))%>%mutate(value = value * 1000)

#Head
df <- read_excel("../../data/prices/raw/tabela782.xlsx", sheet = 1, .name_repair = "unique_quiet")
colnames(df) <- as.character(df[3, ])
df <- df[-c(1,2,3,4,5), ]
df <- df[-nrow(df), ]
colnames(df) <- c("county_id", "county", "group", "born (head)", "victimized (head)", "slaughter 782 (head)", "purchases 782 (head)", "sales 782 (head)")
df <- df %>%
  mutate(county_id = paste0("BR", as.character(county_id))) %>%
  mutate(county_id = ifelse(county_id == "BRNA", lag(county_id), county_id)) %>%
  dplyr::select(-county, -`born (head)`, -`victimized (head)`) %>%
  mutate(group = str_replace_all(group, c(
    "Total" = "all establishments",
    "Pecuária e criação de outros animais" = "cattle establishments" )))
df3 <- df %>% pivot_longer(cols = c("slaughter 782 (head)", "purchases 782 (head)", "sales 782 (head)"),
                           names_to = "variable", values_to = "value") %>% mutate(value = as.numeric(value))

#Value
df <- read_excel('../../data/prices/raw/tabela782.xlsx', sheet = 2, .name_repair = "unique_quiet")
colnames(df) <- as.character(df[3, ])
df <- df[-c(1,2,3,4,5), ]
df <- df[-nrow(df), ]
colnames(df) <- c('county_id','county','group', 'born (r)','victimized (r)','slaughter 782 (r)','purchases 782 (r)','sales 782 (r)')
df <- df %>% mutate(county_id = paste0("BR", as.character(county_id)))
df <- df %>% mutate(county_id = ifelse(county_id == "BRNA", lag(county_id), county_id))
df <- df %>% dplyr::select(-'county',-'born (r)',-'victimized (r)')
df <- df %>% mutate(group = str_replace_all(group, c('Total' = 'all establishments',
    'Pecuária e criação de outros animais' = 'cattle establishments')))
df4 <- df %>%pivot_longer(cols = c('slaughter 782 (r)','purchases 782 (r)','sales 782 (r)'),
    names_to = "variable",values_to = "value")
df4 <- df4 %>% mutate(value = as.numeric(value))%>% mutate(value = value * 1000)
df06 <- bind_rows(df1, df2, df3, df4) %>%  mutate(year = 2006)


#2017
variable_list<-c('all cattle (head)','breeding stock (head)', 
              'sales <50 (head)', 'sales <50 (r)',
              'sales breeders >50 (head)','sales breeders >50 (r)',
              'sales calves >50 (head)','sales calves >50 (r)',
              'sales slaughter >50 (head)','sales slaughter >50 (r)')
n_f <- 0
dfm <- map2_dfr(0:(length(variable_list) - 1), variable_list, function(sheet_index, variable_name) {
  df <- read_excel('../../data/prices/raw/tabela6910.xlsx', sheet = sheet_index+1, .name_repair = "unique_quiet")
  colnames(df) <- as.character(df[3, ])
  df <- df[-c(1,2,3,4,5), ]
  df <- df[-nrow(df), ]
  colnames(df) <- c("county_id", "county", "xxx", "group", "value")
  df <- df %>%
    mutate(county_id = paste0("BR", as.character(county_id)),
           variable = variable_name, county_id = ifelse(county_id == "BRNA", lag(county_id), county_id)) %>%
    dplyr::select(-county, -xxx) %>% mutate(group = str_replace_all(group, c(
      "Total" = "all establishments", "Pecuária e criação de outros animais" = "cattle establishments" )))
  df
})
dfm <- dfm %>%  mutate(value = as.numeric(value))
df1 <- dfm %>% filter(variable %in% c("sales <50 (r)", "sales slaughter >50 (r)", "sales calves >50 (r)", "sales breeders >50 (r)")) %>%
  mutate(value = value * 1000)
df2 <- dfm %>% filter(!variable %in% c("sales <50 (r)", "sales slaughter >50 (r)", "sales calves >50 (r)", "sales breeders >50 (r)"))
df17 <- bind_rows(df1, df2) %>% mutate(year = 2017)

#Append all years
column_order <- c("year", "county_id", "variable", "group", "value")
df <- bind_rows(
  df95 %>% dplyr::select(all_of(column_order)),
  df06 %>% dplyr::select(all_of(column_order)),
  df17 %>% dplyr::select(all_of(column_order)))
write_csv(df, "../../data/production/clean/cattlevalues_brazil_county_census.csv.gz")



############ Cattle prices (CEPEA)

df1 <- read_excel('../../data/prices/raw/cepea/CEPEA_20220118152202.xls', sheet = 2) %>% mutate(price_type = "calf MS (h)")
df2 <- read_excel('../../data/prices/raw/cepea/CEPEA_20220118152325.xls', sheet = 2) %>% mutate(average_weight_kg = "NA",price_type = "calf SP (h)")
df2 <- df2 %>%mutate(average_weight_kg = NA_real_,price_type = "calf SP (h)")
df3 <- read_excel('../../data/prices/raw/cepea/CEPEA_20220118152944.xls', sheet = 2) %>%
  mutate(price_usd = price_usd / 15,average_weight_kg = "NA",price_type = "fed cattle SP (kg)")
df3 <- df3 %>% mutate(price_usd = price_usd / 15,average_weight_kg = NA_real_,price_type = "fed cattle SP (kg)")
df <- bind_rows(df1, df2, df3)
write_csv(df, "../../data/prices/clean/cepea.csv.gz")


############ Slaughter

#Head
df <- read_excel("../../data/production/raw/tabela1092.xlsx", sheet = 1, .name_repair = "unique_quiet")
colnames(df) <- as.character(df[3, ])
df <- df[-c(1,2,3,4,5), ]
df <- df[-nrow(df), 4:ncol(df)]
df <- as.data.frame(t(df))
df <- df %>%
  tibble::rownames_to_column("year") %>%
  rename(slaughter_h = V1) %>%
  mutate(
    year = str_replace_all(year, c(
      "1º trimestre " = "",
      "2º trimestre " = "",
      "3º trimestre " = "",
      "4º trimestre " = ""
    )),
    year = as.integer(year),
    slaughter_h = as.numeric(slaughter_h)
  ) %>%
  group_by(year) %>%
  summarise(slaughter_h = sum(slaughter_h, na.rm = TRUE)) %>%
  ungroup()
df1 <- df

#Weight
df <- read_excel("../../data/production/raw/tabela1092.xlsx", sheet = 2, .name_repair = "unique_quiet")
colnames(df) <- as.character(df[3, ])
df <- df[-c(1,2,3,4,5), ]
df <- df[-nrow(df), 4:ncol(df)]
df <- as.data.frame(t(df))
df <- df %>%
  rownames_to_column("year") %>%
  rename(slaughter_t = V1) %>%
  mutate(
    year = str_replace_all(year, c(
      "1º trimestre " = "",
      "2º trimestre " = "",
      "3º trimestre " = "",
      "4º trimestre " = ""
    )),
    year = as.integer(year),
    slaughter_t = as.numeric(slaughter_t) * 0.001
  ) %>%
  group_by(year) %>%
  summarise(slaughter_t = sum(slaughter_t, na.rm = TRUE)) %>%
  ungroup()
df2 <- df

# Merge with df1 from previous step
df <- df1 %>%inner_join(df2, by = "year")

# Export
write_csv(df, "../../data/production/clean/slaughter_brazil_nation_ptam.csv.gz")














